% Estimation of the logp1 with Area dependce models
% covariates prediction


%% Housekeeping
clc; clear all; close all;
workpath00 = pwd;

addpath('toolbox_xt');
addpath('toolbox_plot');

%% load data
load('data_all.mat');

%% Two panel variables
% xtflag and xtval: combofips
% xtflag2 and xtval2: year
% xtflag3 and xtval3: combofips/year
xtflag3 = xtflag + xtflag2/10000;
xtval3 = unique(xtflag3);

nobs = size(YY,1);
ncity = length(xtval);
nyear = 6;

%% Data for the esitmation
% sel_est = true(nobs,1);  %include all

[~,~,info] = xt_reg(YY, ones(nobs,1), xtflag3, xtval3);
% sel_est = (info.n>=5);
sel_est = true(size(YY,1),1);

est.YY      = YY(sel_est,:);
est.xtflag  = xtflag(sel_est,:);
est.xtval   = unique(est.xtflag);
est.xtflag2 = xtflag2(sel_est,:);
est.xtval2  = unique(est.xtflag2);
est.xtflag3 = xtflag3(sel_est,:);
est.xtval3  = unique(est.xtflag3);
est.nobs    = size(xtflag,1);

% Other controls
YD = [year_dummy2, year_dummy3, year_dummy4, year_dummy5, year_dummy6];

% estimation
est.YD = YD(sel_est,:);
est.XX = XX(sel_est,:);
est.logDC = logDC(sel_est,:);
est.DD = DD(sel_est,:);

est.ZZ = ZZ(sel_est,:);
[~,temp_ZZ2] = xt_reg(est.ZZ(:,2), ones(size(est.ZZ,1),1), est.xtflag, est.xtval);
est.ZZ2 = [ones(size(temp_ZZ2,1),1), temp_ZZ2];

estinfo.islog = 2;
estinfo.FitMethod = 'REML';

%% Preliminary
nobs = size(est.YY,1);
ncity = length(est.xtval);
nyear = 6;

temp_y = est.YY;
temp_yd = est.YD;

temp_XX    = est.XX;
temp_ZZ    = est.ZZ;
temp_ZZ2   = est.ZZ2;
temp_logDC = est.logDC;

islog = estinfo.islog;
if islog == 0
    temp_d = [est.DD];
elseif islog == 1
    temp_d = [log(est.DD)];
elseif islog == 2
    temp_d = [log(1+est.DD)];
end

xtflag = est.xtflag;
xtval  = est.xtval;

xtflag2 = est.xtflag2;
xtval2  = est.xtval2;

xtflag3 = est.xtflag3;
xtval3  = est.xtval3;

ZZ  = est.ZZ;
ZZ2 = est.ZZ2;

%% Actual estimation
% Model 1: covariate depends on distance and its prior depends on area
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

estX_M1 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2)];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist)];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M1.model',num2str(vind),' = temp_m;']);
end
estX_M1.predX = tractX;

%% Actual estimation
% Model 2: covariate depends on distance and its prior depends on area
% - Same as Model 1 but it depends on distance to coast as well
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

estX_M2 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2), temp_logDC, temp_logDC.*temp_ZZ(:,2)];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d, temp_logDC]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2','ej3'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, ...
        log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist), ...
        log(tract.d2c), new_ZZ.*log(tract.d2c), ...
        ];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist), log(tract.d2c)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M2.model',num2str(vind),' = temp_m;']);
end
estX_M2.predX = tractX;

%% Actual estimation
% Model 3: covariate depends on distance and its prior depends on area
% - Same as Model 1 but it depends on distance to coast as well
% - Unlike Model 2, the coefficient on the coast variable does not vary by
% metro area.
% Things can be improved
% a) other information such as distance to coast
% b) full covariance of regression error error for X1 and error for X2 for
% example
% c) taking account for prediction uncertainty
Options = [];
Options.Display = 'iter';
Options.TolFun = 1e-12;
% Options = optimoptions('fminunc', 'TolFun', 1e-15, 'TolX', 1e-15, 'MaxFunEvals', 2000);

temp_XXX = [temp_XX, temp_XX(:,14).^2, temp_XX(:,14).^3];

estX_M3 = [];
tractX = [];
for vind = 1:1:size(temp_XXX,2)
    temp_x = [ones(nobs,1) temp_ZZ(:,2), temp_d, temp_d.*temp_ZZ(:,2), temp_logDC];
    temp_y = temp_XXX(:,vind);
    temp_m = fitlmematrix(temp_x,temp_y,{[ones(nobs,1), temp_d]},{xtflag},...
        'CovariancePattern',{'FullCholesky'},...
        'RandomEffectPredictors',{{'ej1','ej2'}},'RandomEffectGroups',{'city'},...
        'OptimizerOptions',Options, ...
        'FitMethod', estinfo.FitMethod, ...
        'Verbose', 0);
    temp_m
    
    % for prediction
    new_nobs = size(tract.combofips,1);
    new_ZZ = zeros(new_nobs,1);
    for i=1:1:size(temp_ZZ2,1)
        new_ZZ(tract.combofips==xtval(i),1) = temp_ZZ2(i,2);
    end
    
    new_x = [ones(new_nobs,1), new_ZZ, ...
        log(1+tract.min_dist), new_ZZ.*log(1+tract.min_dist), ...
        log(tract.d2c), ...
        ];
    new_z = {[ones(new_nobs,1), log(1+tract.min_dist)]};
    new_g = tract.combofips;
    
    new_y = predict(temp_m,new_x,new_z,new_g);
    tractX = [tractX,new_y];
    
    eval(['estX_M3.model',num2str(vind),' = temp_m;']);
end
estX_M3.predX = tractX;

%% Post estimation
% save
save('results_estX_REML.mat', 'estX_M1', 'estX_M2', 'estX_M3');

%% check
pred1 = estX_M1.predX;
pred2 = estX_M2.predX;
pred3 = estX_M3.predX;





